Load preprocessed dataset (preprocessing code in data_preprocessing.Rmd)

glue('Number of genes: ', nrow(datExpr), '\n',
     'Number of samples: ', ncol(datExpr), ' (', sum(datMeta$Diagnosis_=='ASD'), ' ASD, ',
     sum(datMeta$Diagnosis_!='ASD'), ' controls)')
## Number of genes: 30107
## Number of samples: 80 (45 ASD, 35 controls)

Keep only differentially expressed genes

5838 genes don’t have an adjusted p-value because they have less mean normalized counts than the optimal threshold link, so they are going to be considered not to be significant

plot_data = data.frame('id'=rownames(datExpr), 'mean_expression' = rowMeans(datExpr), 
                       'SFARI_score'=DE_info$`gene-score`!='None',
                       'DExpressed'=DE_info$padj<0.05)
ggplotly(plot_data %>% ggplot(aes(x=mean_expression, fill=DExpressed, color=DExpressed)) + geom_density(alpha=0.3) + 
         scale_x_log10() + ggtitle('gene Mean Expression distribution') + theme_minimal())

We lose almost all of the genes with SFARI score

plot_data_SFARI = plot_data %>% filter(SFARI_score)
ggplotly(plot_data_SFARI %>% ggplot(aes(x=mean_expression, fill=DExpressed, color=DExpressed)) + geom_density(alpha=0.3) + 
              ggtitle('gene Mean Expression distribution for SFARI Genes') + scale_y_sqrt() + theme_minimal())
table(plot_data$DExpressed[plot_data$SFARI_score], useNA='ifany')
## 
## FALSE  TRUE  <NA> 
##   715   151    19
print(paste0('Losing ', round(100*(1-mean(plot_data$DExpressed[plot_data$SFARI_score==TRUE], na.rm=T)),1), 
             '% of the genes with SFARI score'))
## [1] "Losing 82.6% of the genes with SFARI score"
datExpr = datExpr[plot_data$DExpressed & !is.na(plot_data$DExpressed),]
DE_info = DE_info[plot_data$DExpressed & !is.na(plot_data$DExpressed),]
datGenes = datGenes[plot_data$DExpressed & !is.na(plot_data$DExpressed),]

rm(plot_data, plot_data_SFARI)

Dimensionality reduction using PCA

To make calculations more efficient for the more time consuming methods, we can perform PCA and keep the first principal components. As it can be seen in the plot below, the first principal component explains almost all of the variance and after it there doesn’t seem to be a specific cutoff until after the 60th PC, when the variance explained becomes 0. So as an initial filtering I’m keeping the first 67 principal components.

pca = prcomp(datExpr)

var_exp = data.frame('PC'=1:ncol(datExpr), 'var_explained'=summary(pca)$importance[2,])
ggplotly(var_exp %>% ggplot(aes(PC, var_explained)) + geom_point() + 
         geom_vline(xintercept=67.5, linetype='dashed', color='gray') + 
         scale_y_sqrt() + theme_minimal() + ggtitle('% of variance explained by principal component'))
datExpr_redDim = pca$x %>% data.frame %>% dplyr::select(PC1:PC67)

Clustering Methods

clusterings = list()

clusterings[['SFARI_score']] = DE_info$`gene-score`
names(clusterings[['SFARI_score']]) = rownames(DE_info)

clusterings[['SFARI_bool']] = DE_info$`gene-score`!='None'
names(clusterings[['SFARI_bool']]) = rownames(DE_info)

clusterings[['syndromic']] = DE_info$syndromic==1
names(clusterings[['syndromic']]) = rownames(DE_info)



K-means clustering

set.seed(123)
wss = sapply(1:10, function(k) kmeans(datExpr, k, iter.max=200, nstart=25,
                                      algorithm='MacQueen')$tot.withinss)
plot(wss, type='b', main='K-Means Clustering')
best_k = 4
abline(v = best_k, col='blue')

datExpr_k_means = kmeans(datExpr, best_k, iter.max=100, nstart=25)
clusterings[['km']] = datExpr_k_means$cluster



Hierarchical Clustering

Chose k=9 as best number of clusters. SFARI genes seem to group in the last two clusters

h_clusts = datExpr %>% dist %>% hclust
plot(h_clusts, hang = -1, cex = 0.6, labels=FALSE)
abline(h=19, col='blue')

best_k = 10

SFARI and Neuronal related genes seem to concentrate mainly in the aqua, blue and pink clusters

clusterings[['hc']] = cutree(h_clusts, best_k)

create_viridis_dict = function(){
  min_score = clusterings[['SFARI_score']] %>% as.numeric %>% min(na.rm=TRUE)
  max_score = clusterings[['SFARI_score']] %>% as.numeric %>% max(na.rm=TRUE)
  viridis_score_cols = viridis(max_score - min_score + 1)
  names(viridis_score_cols) = seq(min_score, max_score)
  
  return(viridis_score_cols)
}

viridis_score_cols = create_viridis_dict()

dend_meta = DE_info[match(labels(h_clusts), DE_info$ID),] %>% 
            mutate('SFARI_score' = viridis_score_cols[`gene-score`],                     # Purple: 2, Yellow: 6
                   'SFARI_bool' = ifelse(`gene-score` != 'None', '#21908CFF', 'white'),  # Acqua
                   'Syndromic' = ifelse(syndromic == T, 'orange', 'white'),
                   'Neuronal' = ifelse(ID %in% GO_neuronal$ID, '#666666','white')) %>% 
            dplyr::select(SFARI_score, SFARI_bool, Syndromic, Neuronal)

h_clusts %>% as.dendrogram %>% set('labels', rep('', nrow(datMeta))) %>% 
             set('branches_k_color', k=best_k) %>% plot
colored_bars(colors=dend_meta)



Consensus Clustering

Samples are grouped into two big clusters, and then many outliers.


*The rest of the output plots can be found in the Data/Gandal/consensusClustering/genes_DE/ folder

Independent Component Analysis

Following this paper’s guidelines:

  1. Run PCA and keep enough components to explain 60% of the variance (keeping 99.5% of the variance)

  2. Run ICA with that same number of nbComp as principal components kept to then filter them

  3. Select components with kurtosis > 3

  4. Assign genes to clusters with FDR<0.01 using the fdrtool package

Note: It takes too long to run ICA with all 67 principal components, so using the first 40 instead

ICA_output = datExpr_redDim %>% runICA(nbComp=40, method='JADE')
signals_w_kurtosis = ICA_output$S %>% data.frame %>% dplyr::select(colnames(ICA_output$S)[apply(ICA_output$S, 2, kurtosis)>3])
ICA_clusters = apply(signals_w_kurtosis, 2, function(x) fdrtool(x, plot=F, verbose=F)$qval<0.01) %>% data.frame
ICA_clusters = ICA_clusters[colSums(ICA_clusters)>1]

clusterings[['ICA_min']] = rep(NA, nrow(ICA_clusters))
ICA_clusters_names = colnames(ICA_clusters)
for(c in ICA_clusters_names) clusterings[['ICA_min']][ICA_clusters[,c]] = c

clusterings[['ICA_NA']] = is.na(clusterings[['ICA_min']])

Leaves 69% of the genes without a cluster

ICA_clusters %>% rowSums %>% table
## .
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13 
## 2075  282  189  134   95   63   50   42   30   19    6    8    4    3
# ICA_clusters %>% mutate(cl_sum=rowSums(.)) %>% as.matrix %>% melt %>% ggplot(aes(Var2, Var1)) + 
#   geom_tile(aes(fill=value)) + xlab('Clusters') + ylab('Samples') + 
#   theme_minimal() + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + coord_flip()

Trying again these time with all of the principal components and 50 clusters

ICA_output = datExpr_redDim %>% runICA(nbComp=50, method='JADE')
signals_w_kurtosis = ICA_output$S %>% data.frame %>% dplyr::select(names(apply(ICA_output$S, 2, kurtosis)>3))
ICA_clusters = apply(signals_w_kurtosis, 2, function(x) fdrtool(x, plot=F, verbose=F)$qval<0.01) %>% data.frame
ICA_clusters = ICA_clusters[colSums(ICA_clusters)>1]

Doesn’t make a big difference (67%), but it’s still better

ICA_clusters %>% rowSums %>% table
## .
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 2007  269  167  137  107   69   71   48   38   33   17   11   11    7    5 
##   16   17 
##    2    1
clusterings[['ICA_min']] = rep(NA, nrow(ICA_clusters))
ICA_clusters_names = colnames(ICA_clusters)
for(c in ICA_clusters_names) clusterings[['ICA_min']][ICA_clusters[,c]] = c

clusterings[['ICA_NA']] = is.na(clusterings[['ICA_min']])

# ICA_clusters %>% mutate(cl_sum=rowSums(.)) %>% as.matrix %>% melt %>% ggplot(aes(Var2, Var1)) + 
#   geom_tile(aes(fill=value)) + xlab('Clusters') + ylab('Samples') + 
#   theme_minimal() + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + coord_flip()



WGCNA

Note: This method does not work with the reduced version of datExpr.

best_power = datExpr %>% t %>% pickSoftThreshold(powerVector = seq(1, 30, by=2))
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.   max.k.
## 1      1  0.00342  0.261          0.988 7.75e+02  7.70e+02 1330.000
## 2      3  0.58100 -2.220          0.963 9.82e+01  9.03e+01  331.000
## 3      5  0.83500 -2.560          0.989 1.92e+01  1.58e+01  110.000
## 4      7  0.90000 -2.520          0.994 4.96e+00  3.49e+00   43.500
## 5      9  0.88700 -2.380          0.951 1.59e+00  9.14e-01   19.700
## 6     11  0.38500 -3.210          0.349 6.03e-01  2.66e-01    9.780
## 7     13  0.95700 -1.830          0.985 2.62e-01  8.50e-02    5.250
## 8     15  0.94200 -1.780          0.964 1.26e-01  2.95e-02    3.360
## 9     17  0.93400 -1.760          0.974 6.67e-02  1.08e-02    2.450
## 10    19  0.94800 -1.700          0.985 3.77e-02  4.11e-03    1.830
## 11    21  0.39700 -2.280          0.354 2.25e-02  1.65e-03    1.390
## 12    23  0.40700 -2.190          0.367 1.41e-02  6.55e-04    1.070
## 13    25  0.41200 -2.110          0.371 9.23e-03  2.72e-04    0.829
## 14    27  0.41400 -2.040          0.361 6.23e-03  1.15e-04    0.651
## 15    29  0.41300 -1.980          0.363 4.33e-03  4.94e-05    0.515
network = datExpr %>% t %>% blockwiseModules(power=best_power$powerEstimate, numericLabels=T)
clusterings[['WGCNA']] = network$colors
names(clusterings[['WGCNA']]) = rownames(datExpr)

It leaves 1211 genes without a cluster (~40%)

clusterings[['WGCNA']] %>% table
## .
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1211  449  203  158  128  112  106  100   80   76   71   65   40   36   36 
##   15   16   17   18   19 
##   31   26   25   25   22



Gaussian Mixture Models with hard thresholding

The BIC decreases monotonically, but it seems to stabilise at bit at 14

n_clust = datExpr %>% Optimal_Clusters_GMM(max_clusters=40, criterion='BIC', plot_data=FALSE)
plot(n_clust, type='l', main='Bayesian Information Criterion to choose number of clusters')
abline(v=14, col='blue')

best_k = 14
gmm = datExpr %>% GMM(best_k)
clusterings[['GMM']] = gmm$Log_likelihood %>% apply(1, which.max)



Manual Clustering

Separating the two clouds of points into two clusters

intercept=-0.2
slope=0.02
manual_clusters = as.factor(as.numeric(slope*datExpr_redDim$PC1 + intercept > datExpr_redDim$PC2))
names(manual_clusters) = rownames(datExpr_redDim)
clusterings[['Manual']] = manual_clusters

datExpr_redDim %>% ggplot(aes(PC1, PC2, color=manual_clusters)) + geom_point(alpha=0.3) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              geom_abline(intercept=intercept, slope=slope, color='gray') +
              theme_minimal() + ggtitle('PCA')

clusterings[['Manual']] %>% table
## .
##    0    1 
## 1382 1618
rm(intercept, slope, pca)

Both the aqua and the salmon clusters seem to be componsed of three Gaussians in the Mean and SD plots.

manual_clusters_data = cbind(apply(datExpr_redDim, 1, mean), apply(datExpr_redDim, 1, sd), 
                             manual_clusters) %>% data.frame
colnames(manual_clusters_data) = c('mean','sd','cluster')
manual_clusters_data = manual_clusters_data %>% mutate('cluster'=as.factor(cluster))
p1 = manual_clusters_data %>% ggplot(aes(x=mean, color=cluster, fill=cluster)) + 
  geom_density(alpha=0.4) + theme_minimal()
p2 = manual_clusters_data %>% ggplot(aes(x=sd, color=cluster, fill=cluster)) + 
  geom_density(alpha=0.4) + theme_minimal()
grid.arrange(p1, p2, ncol=2)

Separate genes into four and two Gaussians, respectively by their mean expression:

gg_colour_hue = function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

c1_mean = manual_clusters_data %>% filter(cluster==1) %>% dplyr::select(mean)
rownames(c1_mean) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='1']
gmm_c1_mean = c1_mean %>% GMM(3)

c2_mean = manual_clusters_data %>% filter(cluster==2) %>% dplyr::select(mean)
rownames(c2_mean) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='2']
gmm_c2_mean = c2_mean %>% GMM(3)

clusterings[['Manual_mean']] = as.character(clusterings[['Manual']])
clusterings[['Manual_mean']][clusterings[['Manual']]==0] = gmm_c1_mean$Log_likelihood %>% 
  apply(1, function(x) glue('1_',which.max(x)))
clusterings[['Manual_mean']][clusterings[['Manual']]==1] = gmm_c2_mean$Log_likelihood %>% 
  apply(1, function(x) glue('2_',which.max(x)))


plot_gaussians = manual_clusters_data %>% ggplot(aes(x=mean)) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[1],
                args=list(mean=gmm_c1_mean$centroids[1], sd=gmm_c1_mean$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[2],
                args=list(mean=gmm_c1_mean$centroids[2], sd=gmm_c1_mean$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[3],
                args=list(mean=gmm_c1_mean$centroids[3], sd=gmm_c1_mean$covariance_matrices[3])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[4],
                args=list(mean=gmm_c2_mean$centroids[1], sd=gmm_c2_mean$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[5],
                args=list(mean=gmm_c2_mean$centroids[2], sd=gmm_c2_mean$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[6],
                args=list(mean=gmm_c2_mean$centroids[3], sd=gmm_c2_mean$covariance_matrices[3])) +
  theme_minimal()

plot_points = datExpr_redDim %>% ggplot(aes_string(x='PC1', y='PC2', color=as.factor(clusterings[['Manual_mean']]))) + 
          geom_point() + theme_minimal()

grid.arrange(plot_gaussians, plot_points, ncol=2)

clusterings[['Manual_mean']] %>% table
## .
## 1_1 1_2 1_3 2_1 2_2 2_3 
## 472 340 570 347 583 688

Separate clusters into three Gaussians per diagnosis by their sd:

n_clusters = 3

c1_sd = manual_clusters_data %>% filter(cluster==1) %>% dplyr::select(sd)
rownames(c1_sd) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='1']
gmm_c1_sd = c1_sd %>% GMM(n_clusters)

c2_sd = manual_clusters_data %>% filter(cluster==2) %>% dplyr::select(sd)
rownames(c2_sd) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='2']
gmm_c2_sd = c2_sd %>% GMM(n_clusters)

clusterings[['Manual_sd']] = as.character(clusterings[['Manual']])
clusterings[['Manual_sd']][clusterings[['Manual']]==0] = gmm_c1_sd$Log_likelihood %>% 
  apply(1, function(x) glue('1_',which.max(x)))
clusterings[['Manual_sd']][clusterings[['Manual']]==1] = gmm_c2_sd$Log_likelihood %>% 
  apply(1, function(x) glue('2_',which.max(x)))


plot_gaussians = manual_clusters_data %>% ggplot(aes(x=sd)) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[1],
                args=list(mean=gmm_c1_sd$centroids[1], sd=gmm_c1_sd$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[2], 
                args=list(mean=gmm_c1_sd$centroids[2], sd=gmm_c1_sd$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[3], 
                args=list(mean=gmm_c1_sd$centroids[3], sd=gmm_c1_sd$covariance_matrices[3])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[4],
                args=list(mean=gmm_c2_sd$centroids[1], sd=gmm_c2_sd$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[5],
                args=list(mean=gmm_c2_sd$centroids[2], sd=gmm_c2_sd$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[6],
                args=list(mean=gmm_c2_sd$centroids[3], sd=gmm_c2_sd$covariance_matrices[3])) +
  theme_minimal()


plot_points = datExpr_redDim %>% ggplot(aes_string(x='PC1', y='PC2', color=as.factor(clusterings[['Manual_sd']]))) + 
          geom_point() + theme_minimal()

grid.arrange(plot_gaussians, plot_points, ncol=2)

clusterings[['Manual_sd']] %>% table
## .
## 1_1 1_2 1_3 2_1 2_2 2_3 
## 463 332 587 458 605 555
rm(c1_sd, c2_sd, gmm_c1_sd, gmm_c2_sd)
# Clean up the environment a bit
rm(wss, datExpr_k_means, h_clusts, cc_output, best_k, ICA_output, ICA_clusters_names, signals_w_kurtosis, 
   n_clust, gmm, network, best_power, c, manual_clusters, manual_clusters_data, c1_mean, c2_mean, 
   gmm_c1_mean, gmm_c2_mean, p1, p2, dend_meta, plot_gaussians, plot_points, n_clusters, viridis_score_cols, 
   gg_colour_hue, create_viridis_dict)

Compare clusterings

Using Adjusted Rand Index:

cluster_sim = data.frame(matrix(nrow = length(clusterings), ncol = length(clusterings)))
for(i in 1:(length(clusterings))){
  cluster1 = as.factor(clusterings[[i]])
  for(j in (i):length(clusterings)){
    cluster2 = as.factor(clusterings[[j]])
    cluster_sim[i,j] = adj.rand.index(cluster1, cluster2)
  }
}
colnames(cluster_sim) = names(clusterings)
rownames(cluster_sim) = colnames(cluster_sim)

cluster_sim = cluster_sim %>% as.matrix %>% round(2)
heatmap.2(x = cluster_sim, Rowv = FALSE, Colv = FALSE, dendrogram = 'none', 
          cellnote = cluster_sim, notecol = 'black', trace = 'none', key = FALSE, 
          cexRow = 1, cexCol = 1, margins = c(7,7))

rm(i, j, cluster1, cluster2, cluster_sim)

Scatter plots

  • The simple clustering methods only consider the 1st component, dividing by vertical lines

  • GMM does vertical clusters when using the complete expression matrix but round, small clusters when using the reduced version

  • WGCNA clusters don’t seem to have a strong relation with the first principal components

  • SFARI genes seem to be everywhere (perhaps a bit more concentrated on the right side of the plot)

  • 1st PC seems to reflect the average level of expression of the genes

  • There seems to be a change in behaviour around PC1=0 (CC)

plot_points = datExpr_redDim %>% data.frame() %>% dplyr::select(PC1:PC3) %>%
              mutate(ID = rownames(.),                                 k_means = as.factor(clusterings[['km']]),
                hc = as.factor(clusterings[['hc']]),                   cc = as.factor(clusterings[['cc_l1']]),
                ica = as.factor(clusterings[['ICA_min']]),
                n_ica = as.factor(rowSums(ICA_clusters)),              gmm = as.factor(clusterings[['GMM']]),
                wgcna = as.factor(clusterings[['WGCNA']]),             manual = as.factor(clusterings[['Manual']]),
                manual_mean = as.factor(clusterings[['Manual_mean']]), manual_sd = as.factor(clusterings[['Manual_sd']]),   
                SFARI = as.factor(clusterings[['SFARI_score']]),       SFARI_bool = as.factor(clusterings[['SFARI_bool']]),
                syndromic = as.factor(clusterings[['syndromic']])) %>%
              bind_cols(DE_info[DE_info$ID %in% rownames(datExpr_redDim),]) %>% 
              mutate(avg_expr = log2(rowMeans(datExpr)+1)[rownames(datExpr) %in% rownames(datExpr_redDim)])
rownames(plot_points) = plot_points$ID
selectable_scatter_plot(plot_points, plot_points)




Save clusterings

clusterings_file = './../Data/Gandal/clusterings.csv'

if(file.exists(clusterings_file)){

  df = read.csv(clusterings_file, row.names=1)
  
  if(!all(rownames(df) == rownames(datExpr))) stop('Gene ordering does not match the one in clusterings.csv!')
  
  for(clustering in names(clusterings)){
    df = df %>% mutate(!!clustering := as.factor(sub(0, NA, clusterings[[clustering]])))
    rownames(df) = rownames(datExpr)
  }
  
} else {
  
  df = clusterings %>% unlist %>% matrix(nrow=length(clusterings), byrow=T) %>% t %>% data.frame %>% na_if(0)
  colnames(df) = names(clusterings)
  rownames(df) = rownames(datExpr)

}

write.csv(df, file=clusterings_file)

rm(clusterings_file, df, clustering)

Session info

sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] dendextend_1.10.0           gplots_3.0.1               
##  [3] pdfCluster_1.0-3            WGCNA_1.66                 
##  [5] fastcluster_1.1.25          dynamicTreeCut_1.63-1      
##  [7] ClusterR_1.1.8              fdrtool_1.2.15             
##  [9] moments_0.14                MineICA_1.22.0             
## [11] fastICA_1.2-1               Hmisc_4.1-1                
## [13] Formula_1.2-3               survival_2.43-3            
## [15] lattice_0.20-38             annotate_1.60.1            
## [17] XML_3.98-1.11               Rgraphviz_2.26.0           
## [19] igraph_1.2.4                colorspace_1.4-1           
## [21] mclust_5.4                  marray_1.60.0              
## [23] limma_3.38.3                cluster_2.0.7-1            
## [25] GOstats_2.48.0              graph_1.60.0               
## [27] Category_2.48.1             Matrix_1.2-15              
## [29] AnnotationDbi_1.44.0        IRanges_2.16.0             
## [31] S4Vectors_0.20.1            gtools_3.5.0               
## [33] biomaRt_2.38.0              xtable_1.8-3               
## [35] foreach_1.4.4               scales_1.0.0               
## [37] plyr_1.8.4                  Biobase_2.42.0             
## [39] BiocGenerics_0.28.0         JADE_2.0-1                 
## [41] ConsensusClusterPlus_1.46.0 GGally_1.4.0               
## [43] gridExtra_2.3               viridis_0.5.1              
## [45] viridisLite_0.3.0           RColorBrewer_1.1-2         
## [47] plotlyutils_0.0.0.9000      plotly_4.8.0               
## [49] glue_1.3.1                  reshape2_1.4.3             
## [51] forcats_0.3.0               stringr_1.4.0              
## [53] dplyr_0.8.0.1               purrr_0.3.1                
## [55] readr_1.3.1                 tidyr_0.8.3                
## [57] tibble_2.1.1                ggplot2_3.1.0              
## [59] tidyverse_1.2.1            
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_0.2.5            robust_0.4-18              
##   [3] RSQLite_2.1.1               htmlwidgets_1.3            
##   [5] trimcluster_0.1-2.1         BiocParallel_1.16.6        
##   [7] gmp_0.5-13.5                munsell_0.5.0              
##   [9] codetools_0.2-15            preprocessCore_1.44.0      
##  [11] withr_2.1.2                 knitr_1.22                 
##  [13] rstudioapi_0.7              geometry_0.4.0             
##  [15] robustbase_0.93-0           labeling_0.3               
##  [17] FD_1.0-12                   GenomeInfoDbData_1.2.0     
##  [19] bit64_0.9-7                 generics_0.0.2             
##  [21] xfun_0.5                    diptest_0.75-7             
##  [23] R6_2.4.0                    doParallel_1.0.14          
##  [25] GenomeInfoDb_1.18.2         clue_0.3-57                
##  [27] locfit_1.5-9.1              flexmix_2.3-15             
##  [29] bitops_1.0-6                reshape_0.8.7              
##  [31] DelayedArray_0.8.0          assertthat_0.2.1           
##  [33] promises_1.0.1              nnet_7.3-12                
##  [35] gtable_0.2.0                Cairo_1.5-9                
##  [37] rlang_0.3.2                 genefilter_1.64.0          
##  [39] splines_3.5.2               lazyeval_0.2.2             
##  [41] acepack_1.4.1               impute_1.56.0              
##  [43] broom_0.5.1                 checkmate_1.8.5            
##  [45] yaml_2.2.0                  abind_1.4-5                
##  [47] modelr_0.1.4                crosstalk_1.0.0            
##  [49] backports_1.1.2             httpuv_1.5.0               
##  [51] RBGL_1.58.2                 tools_3.5.2                
##  [53] Rcpp_1.0.1                  base64enc_0.1-3            
##  [55] progress_1.2.0              zlibbioc_1.28.0            
##  [57] RCurl_1.95-4.10             prettyunits_1.0.2          
##  [59] rpart_4.1-13                SummarizedExperiment_1.12.0
##  [61] haven_1.1.1                 magrittr_1.5               
##  [63] data.table_1.12.0           mvtnorm_1.0-7              
##  [65] whisker_0.3-2               matrixStats_0.54.0         
##  [67] mime_0.6                    hms_0.4.2                  
##  [69] evaluate_0.13               readxl_1.1.0               
##  [71] compiler_3.5.2              KernSmooth_2.23-15         
##  [73] crayon_1.3.4                htmltools_0.3.6            
##  [75] later_0.8.0                 mgcv_1.8-26                
##  [77] pcaPP_1.9-73                geneplotter_1.60.0         
##  [79] rrcov_1.4-3                 lubridate_1.7.4            
##  [81] DBI_1.0.0                   magic_1.5-9                
##  [83] MASS_7.3-51.1               fpc_2.1-11.1               
##  [85] ade4_1.7-11                 permute_0.9-4              
##  [87] cli_1.1.0                   gdata_2.18.0               
##  [89] GenomicRanges_1.34.0        pkgconfig_2.0.2            
##  [91] fit.models_0.5-14           foreign_0.8-71             
##  [93] xml2_1.2.0                  XVector_0.22.0             
##  [95] AnnotationForge_1.24.0      rvest_0.3.2                
##  [97] digest_0.6.18               vegan_2.5-2                
##  [99] rmarkdown_1.12              cellranger_1.1.0           
## [101] htmlTable_1.11.2            GSEABase_1.44.0            
## [103] kernlab_0.9-27              shiny_1.2.0                
## [105] modeltools_0.2-22           nlme_3.1-137               
## [107] jsonlite_1.6                pillar_1.3.1               
## [109] httr_1.4.0                  DEoptimR_1.0-8             
## [111] GO.db_3.7.0                 prabclus_2.2-7             
## [113] iterators_1.0.9             bit_1.1-14                 
## [115] class_7.3-14                stringi_1.4.3              
## [117] blob_1.1.1                  DESeq2_1.22.2              
## [119] latticeExtra_0.6-28         caTools_1.17.1             
## [121] memoise_1.1.0               ape_5.3